
rm(list=ls())

library(foreign)
library(lme4)
library(bcp)
library(survey)
library(sandwich)

# set up the working directory in your computer
setwd("")

data <- read.dta("imp_combine.dta")

# ----------- handling data -------------- # 
# --- exclude following data
data <- data[!data$year==2006,] # ANES 2006
data <- data[!data$year==2007,] # ANES 2006
data <- data[!is.na(data$year),] 

# --- adjust party id 
data$r_partyid[data$r_partyid>6] <- NA 
data$r_partyid_int <- abs(data$r_partyid-3)

# --- set-up the debate dates in 2004
debate2004 <- as.numeric(as.Date("2004-09-30")) # first debate in 2004
election2004 <- as.numeric(as.Date("2004-11-02")) # election in 2004 

# ===========================================================
# Figure 2.Differences of mean network size by party identification across sample surveys.
# these values come from important matters_analysis_Tables.do file 
# ===========================================================

	list.X.title <- c("GSS 1985", "GSS 1987", "ANES 2000", "GSS 2004","GSS 2010")
	out.tab2 <- NULL 
	out.tab2 <- rbind(out.tab2,c(2.99,2.62))
	out.tab2 <- rbind(out.tab2,c(2.52,2.35))
	out.tab2 <- rbind(out.tab2,c(2.39,1.56))
	out.tab2 <- rbind(out.tab2,c(2.25,1.53))
	out.tab2 <- rbind(out.tab2,c(2.51,2.07))
	
	sig.diff <- c(2,2,1,1,1)
	
	pdf("Figure2.pdf",paper="special",width=5,height=4)
	xid <- c(1,2,3,4,5)
	par(mar=c(3.1,4.1,4.1,2.1))
	plot(xid,xid,type="n",xaxt="n",yaxt="n", ylim=c(1.5,3.5), xlim=c(0.5,5.5),
		xlab="",ylab="Mean Network Size", main="Network Size by Party Identification", cex.main=1)
	points(xid,out.tab2[,2],pch=4,col="black")
	points(xid,out.tab2[,1],pch=1,col="black")
	for (i in 1:length(xid)) lines(rep(xid[i],2),c(out.tab2[i,1],out.tab2[i,2]),col="black", lty=sig.diff[i])
	
	points(xid,out.tab2[,2],type="l", col="red")
	axis(1, xid, list.X.title, cex.axis =0.6)
	axis(2, seq(from=1,to=3.5,by=0.5), seq(from=1,to=3.5,by=0.5), cex.axis = 1)
	
	legend("topright",pch=c(1,4),c("Others","Independent"),bty="n",cex=0.8)	
	dev.off()

# ===========================================================
# Figure 3. Network variation around the first Presidential debate in 2004.
# ===========================================================

# ---------- estimate daily network size / number of participants 
	# --- daily data --- # 
	# number of participants
	# network size : mean 
	# network size : mean : +-1 day moving average 
	# network size : multilevel pooling 
	
	list.series <- list()

	data <- data[!is.na(data$svydate2),]
	year.list <- sort(unique(data$year))
	
	for (rr in 1:length(year.list)){
		year <- year.list[rr]
		rdata <- data[data$year==year,]
		
		time <- as.Date(rdata$svydate2)
		duration <- max(time)-min(time)+1
		
		ns <- rdata$n_size 
		ns <- rdata$n_size 
		a2 <- lmer(n_size~1+(1|svydate2),data=rdata)
	
		ns.pool <- fixef(a2)+ranef(a2)$svydate2[,1]
		time.pool <- as.Date(rownames(ranef(a2)$svydate2))
	
		rmat <- array(NA,dim=c(duration,4,2))
	
		for (t in min(time):max(time)) {
			i <- which(c(min(time):max(time)) %in% t)
			
			loc <- which(time %in% t)
		
				rmat[i,1,1] <- length(loc)
				rmat[i,2,1] <- sum(!is.na(ns[loc]))
				rmat[i,3,1] <- mean(ns[loc],na.rm=TRUE)
			mw <- c(t-1,t,t+1)
			loc.mw <- which(time %in% mw)
	
				rmat[i,1,2] <- length(loc.mw)
				rmat[i,2,2] <- sum(!is.na(ns[loc.mw]))
				rmat[i,3,2] <- mean(ns[loc.mw],na.rm=TRUE)
			loc.pool <- which(time.pool %in% t)
			loc.pool.mw <- which(time.pool %in% mw)
				rmat[i,4,1] <- mean(ns.pool[loc.pool])
				rmat[i,4,2] <- mean(ns.pool[loc.pool.mw])
		} 
		
		rdate <- as.Date(min(time):max(time), origin="1970-01-01")
	
		list.series[[rr]] <- list(rmat=rmat, rdate=rdate)
	}

# --------- Bayesian Change Point models 
	rdata <- data[data$year==2004,]
	
	a2 <- lmer(n_size~1+(1|svydate2),data=rdata)
	out2<- fixef(a2)+ranef(a2)$svydate2[,1]
	
	k.name <- as.Date(rownames(ranef(a2)$svydate2))
	out2 <- out2[order(k.name)]
	
	b2 <- bcp(out2,burnin=2000,mcmc=200000,p0=0.2,w0=0.2)
	
	max.point2 <- b2$data[which(b2$posterior.prob == max(b2$posterior.prob,na.rm=TRUE)),1]
	max.point3 <- b2$data[order(-b2$posterior.prob),][2,1]
	max.point4 <- b2$data[order(-b2$posterior.prob),][5,1]
	
	prb <- b2$posterior.prob
	prm <- b2$posterior.mean


# ---------- plot 
	list.X.title <- c("GSS 1985", "GSS 1987", "ANES 2000", "GSS 2004","ANES 2008","GSS 2010")

	pdf("Figure3.pdf",paper="special",height=10,width=10)
	layout(matrix(1:4,2,2,byrow=TRUE))
	rr <- 4
	a<-list.series[[rr]]
	rmat <- a$rmat 
	rdate <- a$rdate
	
	# ------------ the number of suvery participants
	kk<-2
	plot(rdate,rmat[,kk,1],pch="",ylab="Number of Survey Participants",xlab="Survey Date",
		main="Daily Participants",
		xaxt="n",type="h",lwd=5,col="gray")
	axis(1, rdate, format(rdate, "%m/%d"), cex.axis = .5)
	abline(v=debate2004,col="red",lwd=2)
	abline(v=election2004,col="blue",lwd=2)
	abline(v=debate2004-30,col="black",lwd=1,lty=2)
	abline(v=debate2004+30,col="black",lwd=1,lty=2)
	
	legend("topright",lty=c(1,2,1),lwd=c(2,1,2),
		col=c("red","black","blue"),bty="n",
		c("first debate","debate +/- 30","election"))
	legend("topleft","A", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)
	

	r1 <- 30 
	r2 <- 30 
	range <- as.Date(c(debate2004-r1,debate2004+r2), origin="1970-01-01")
	rmat <- rmat[(range[1]<rdate)&(rdate< range[2]),,]
	rdate <- rdate[(range[1]<rdate)&(rdate< range[2])]
	
	colfunc <- colorRampPalette(c("black", "white"))
	col.part <- c(rev(colfunc(30)),colfunc(30)[2:30])
	
	# --------- network size - V pattern 
	kk<-3
	plot(rdate, rmat[,kk,1],pch=19, xaxt="n", xlim=c(as.Date("2004-09-01"),as.Date("2004-10-31")),
		xlab="", ylab="Mean Network Size",main="Network Size around the First Debate",col="darkgray")
	axis(1, rdate, format(rdate, "%m/%d"), cex.axis = .5)
	
	x <- rmat[,kk,1]
	t <- as.numeric(rdate)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.15)
	y.predict <- predict(y.loess, data.frame(x=t))
	
	y.loess1 <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict1 <- predict(y.loess1, data.frame(x=t))
	
	lines(t,y.predict,col="blue",lty=2, lwd=2)
	lines(t,y.predict1,col="blue",lty=1, lwd=2)
	
	
	abline(v=as.Date("2004-09-30"),col="red",lwd=2)
	abline(v=as.Date("2004-10-08"),col="brown",lwd=1)
	abline(v=as.Date("2004-10-13"),col="purple",lwd=1)
	
	legend("topright",lwd=c(2,1,1),
		col=c("red","brown","purple"),bty="n",
		c("first debate","second debate","third debate"))
	
	legend("topleft","B", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)
	
	plot(k.name,prb, type="l",xlim=c(as.Date("2004-09-01"),as.Date("2004-10-31")),
		main="Posterior Probability of Change Point", 
	 xlab="Date", xaxt="n", ylab="Posterior Probability")
	
	abline(v=as.Date(k.name[max.point2], origin="1970-01-01"),col="blue")
	abline(v=as.Date(k.name[max.point3], origin="1970-01-01"),col="blue",lty=2)
	abline(v=as.Date(k.name[max.point4], origin="1970-01-01"),col="blue",lty=2)
	abline(v=debate2004,col="red",lwd=2)
	axis(1, k.name, format(as.Date(k.name,origin="1970-01-01"), "%m/%d"), cex.axis = .8)
	
	legend("topright",c("1st debate (09/30)","max. cp (09/28)","2nd max. cp(09/08,10/23)"),pch=NA,lty=c(1,1,2),col=c("red","blue","blue"), bty="n")
	legend("topleft","C", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)
	
	plot(k.name,prm, type="l",xlim=c(as.Date("2004-09-01"),as.Date("2004-10-31")),
	 main="Posterior Mean of Network Size", 
	 xlab="Date", xaxt="n", ylab="Posterior Mean")
	abline(v=as.Date(k.name[max.point2], origin="1970-01-01"),col="blue")
	abline(v=as.Date(k.name[max.point3], origin="1970-01-01"),col="blue",lty=2)
	abline(v=as.Date(k.name[max.point4], origin="1970-01-01"),col="blue",lty=2)
	abline(v=debate2004,col="red",lwd=2)
	legend("topright",c("1st debate (09/30)","max. cp (09/28)","2nd max. cp(09/08,10/23)"),pch=NA,lty=c(1,1,2),col=c("red","blue","blue"), bty="n")
	
	axis(1, k.name, format(as.Date(k.name,origin="1970-01-01"), "%m/%d"), cex.axis = .8)
	legend("bottomright",c("1st debate (09/30)","max. cp (09/28)","2nd max. cp(09/08,10/23)"),pch=NA,lty=c(1,1,2),col=c("blue","red","red"), bty="n")
	legend("topleft","D", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)
	
	dev.off()

# ===========================================================
# Figure 4. The interaction effects of priming with political engagement at the individual level
# ===========================================================

	rdata <- data[data$year==2004,]
	rdata <- rdata[!is.na(rdata$n_size),]
	
	time <- as.Date(rdata$svydate2)
	duration <- max(time)-min(time)+1
	
	ns <- rdata$n_size 
	i1 <- rdata$r_pol_interest+1
	i2 <- rdata$r_pol_discuss+1
	
# estimate network size across different subgroups across time 	
# political interests
	rmat1 <- array(NA,dim=c(duration,5,2))
	for (t in min(time):max(time)) {
		i <- which(c(min(time):max(time)) %in% t)
	
		loc <- which(time %in% t)
			for (k in 1:4) rmat1[i,k,1] <- mean(ns[loc][i1[loc]==k&!is.na(i1[loc])],na.rm=TRUE)
						   rmat1[i,5,1] <- mean(ns[loc][i1[loc]< 4&!is.na(i1[loc])],na.rm=TRUE)
		mw <- c(t-1,t,t+1)
		loc.mw <- which(time %in% mw)
			for (k in 1:4) rmat1[i,k,2] <- mean(ns[loc.mw][i1[loc.mw]==k&!is.na(i1[loc.mw])],na.rm=TRUE)
						   rmat1[i,5,2] <- mean(ns[loc.mw][i1[loc.mw]< 4&!is.na(i1[loc.mw])],na.rm=TRUE)
	} 
	
	rdate1 <- as.Date(min(time):max(time), origin="1970-01-01")

# political talking frequency
	rmat2 <- array(NA,dim=c(duration,5,2))
	for (t in min(time):max(time)) {
		i <- which(c(min(time):max(time)) %in% t)
	
		loc <- which(time %in% t)
			for (k in 1:4) rmat2[i,k,1] <- mean(ns[loc][i2[loc]==k&!is.na(i2[loc])],na.rm=TRUE)
						   rmat2[i,5,1] <- mean(ns[loc][i2[loc]< 4&!is.na(i2[loc])],na.rm=TRUE)
		mw <- c(t-1,t,t+1)
		loc.mw <- which(time %in% mw)
			for (k in 1:4) rmat2[i,k,2] <- mean(ns[loc.mw][i2[loc.mw]==k&!is.na(i2[loc.mw])],na.rm=TRUE)
						   rmat2[i,5,2] <- mean(ns[loc.mw][i2[loc.mw]< 4&!is.na(i2[loc.mw])],na.rm=TRUE)
	} 
	
	rdate2 <- as.Date(min(time):max(time), origin="1970-01-01")

	# ---------- plot 
	pdf("Figure4.pdf",paper="special",width=9,height=5)
	layout(matrix(1:2,1,2))
	debate2004 <- as.numeric(as.Date("2004-09-30")) # first debate in 2004
	debate <- debate2004 
	
	r <- 30
	range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
	
	rmat <- rmat1[(range[1]<rdate1)&(rdate1< range[2]),,2]
	rdate <- rdate1[(range[1]<rdate1)&(rdate1< range[2])]
	plot(rdate, rmat[,1],pch=19,ylim=c(.7,3.2), xaxt="n",type="n",
		xlab="Survey Dates", ylab="Network Size",main="Political Interests",col="darkgray")
	axis(1, rdate, format(rdate, "%m/%d"), cex.axis = .8)
	
	kk <- 5
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	points(rdate,x,col="skyblue",pch=20)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="blue",lty=1, lwd=2)
	
	
	kk <- 4
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	points(rdate,x,col="pink",pch=20)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="red",lty=1, lwd=2)
	
	
	abline(v=as.Date("2004-09-30"),col="black",lwd=2)
		
	legend("topright",col=c("red","blue"),bty="n",pch=20,
		c("High","Low"),cex=1)
		
	legend("topleft","A", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)
	
	r <- 30
	range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
	
	rmat <- rmat2[(range[1]<rdate2)&(rdate2< range[2]),,2]
	rdate <- rdate2[(range[1]<rdate2)&(rdate2< range[2])]
	plot(rdate, rmat[,1],pch=19,ylim=c(.7,3.2), xaxt="n",type="n",
		xlab="Survey Dates", ylab="Network Size",main="Political Discussion",col="darkgray")
	axis(1, rdate, format(rdate, "%m/%d"), cex.axis = .8)
	
	kk <- 5
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	points(rdate,x,col="skyblue",pch=20)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="blue",lty=1, lwd=2)
	
	
	kk <- 4
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	points(rdate,x,col="pink",pch=20)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="red",lty=1, lwd=2)
	
	
	abline(v=as.Date("2004-09-30"),col="black",lwd=2)
		
	legend("topright",col=c("red","blue"),bty="n",pch=20,
		c("High","Low"),cex=1)
		
	legend("topleft","B", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)
	dev.off()

# ===========================================================
# Figure 5. Priming and spatial political polarization in 2004
# ===========================================================

	rdata <- data[data$year==2004,]
	
	time <- as.Date(rdata$svydate2)
	duration <- max(time)-min(time)+1
	
	# psu-unit identier : sampcode 
	sampcode <- rdata$sampcode
	list.sampcode <- sort(unique(sampcode))
	
	# ----------three different measures for geographic polarization
	# 1. % moderate
	# 2. kurtosis on political ideology
	# 3. partisan sorting (correlation)
	
	geo.polarization <- array(NA,dim=c(length(list.sampcode),4))
	
	for (j in 1:length(list.sampcode)){
		jj <- list.sampcode[j]
		geo.partyid <- rdata$r_partyid[rdata$sampcode==jj]+1
		geo.ideology <- rdata$r_ideology[rdata$sampcode==jj]
	
		geo.polarization[j,1] <- jj
		geo.polarization[j,2] <- mean(geo.ideology==4,na.rm=TRUE)
		a<-geo.ideology[!is.na(geo.ideology)]
		geo.polarization[j,3] <- sum((a-mean(a))^4)/(length(a)*sd(a)^4)-3
		geo.polarization[j,4] <- cor.test(geo.partyid,geo.ideology)$estimate
		
	}
	
	for (j in 1:nrow(geo.polarization)){
		jj <- geo.polarization[j,1]
		rdata$geo.pol1[rdata$sampcode==jj] <- geo.polarization[j,2] 
		rdata$geo.pol2[rdata$sampcode==jj] <- geo.polarization[j,3] 
		rdata$geo.pol3[rdata$sampcode==jj] <- geo.polarization[j,4] 
	}
	
# below function is to calculate mean network size across subgroups across time
	vmat.out <- function(rdata=rdata,var=var,...){
		time <- rdata$svydate2 
		time <- as.Date(time)
		duration <- max(time)-min(time)+1
		
		ns <- rdata$n_size 
		x <- rdata[,var]
		
		avg <- mean(x,na.rm=TRUE)
		high <- ifelse(x>avg,1,0)
		
		rmat <- array(NA,dim=c(duration,3,2))
		for (t in min(time):max(time)) {
			i <- which(c(min(time):max(time)) %in% t)
			
			loc <- which(time %in% t)
			mw <- c(t-1,t,t+1)
			loc.mw <- which(time %in% mw)
			if (length(loc)>0){
				rmat[i,1,1] <- length(loc)
				rmat[i,2,1] <- mean(ns[loc][high[loc]==1&!is.na(high[loc])],na.rm=TRUE)
				rmat[i,3,1] <- mean(ns[loc][high[loc]==0&!is.na(high[loc])],na.rm=TRUE)
			} else {
				rmat[i,1,1] <- 0	
			}
			
			if (length(loc.mw)>0){
				rmat[i,1,2] <- length(loc.mw)
				rmat[i,2,2] <- mean(ns[loc.mw][high[loc.mw]==1&!is.na(high[loc.mw])],na.rm=TRUE)
				rmat[i,3,2] <- mean(ns[loc.mw][high[loc.mw]==0&!is.na(high[loc.mw])],na.rm=TRUE)
			} else {
				rmat[i,1,2] <- 0
			}
		}
		
		return(list(rmat=rmat,time=c(min(time):max(time))))
	}
	

# ------- plot 
	pdf("Figure5.pdf",paper="special",height=6,width=9)
	layout(matrix(1:6,2,3,byrow=FALSE))

	temp <- vmat.out(rdata=rdata, var="geo.pol1")
	rmat <- temp$rmat
	rdate <- as.Date(temp$time,origin="1970-01-01")

	debate <- debate2004
	r <- 30 

	range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
	rmat <- rmat[(range[1]<rdate)&(rdate< range[2]),,2]
	rdate <- rdate[(range[1]<rdate)&(rdate< range[2])]

	par(mar=c(5.1,4.1,4.1,2.1))
	pid <- geo.polarization[,2]
	h <- hist(pid,plot=FALSE)
	cuts <- cut(h$breaks, c(-Inf,mean(pid,na.rm=TRUE),Inf))
	cols <- ifelse(as.numeric(cuts)==1,"red","blue")
	plot(h,col=cols,xlab="Ideology : % Moderate in PSUs",main="% Moderate")
	legend("topright",col=c("red","blue"),pch=19,c("High Polarization","Low Polarization"),bty="n")
	legend("topleft","A", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)

	par(mar=c(3.1,4.1,1.1,2.1))
	kk <- 1;
	plot(rdate, rmat[,kk],pch=19,ylim=c(0,3.2), xaxt="n", type="n",
		xlab="Survey Dates", ylab="Network Size",main="",col="darkgray")
	axis(1, rdate, format(rdate, "%m/%d"), cex.axis = .8)

	kk <- 3;points(rdate, rmat[,kk],pch=19,col="pink")
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="red",lty=1, lwd=2)

	kk <- 2;
	points(rdate, rmat[,kk],pch=19,col="skyblue")
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="blue",lty=1, lwd=2)

	abline(v=debate,col="black",lwd=2)
	text(x=debate-15,y=0.3,  paste("First Debate =","09/30",sep=""),col="black")
	legend("topleft","D", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)


	temp <- vmat.out(rdata=rdata, var="geo.pol2")
	rmat <- temp$rmat
	rdate <- as.Date(temp$time,origin="1970-01-01")

	debate <- debate2004
	r <- 30 

	range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
	rmat <- rmat[(range[1]<rdate)&(rdate< range[2]),,2]
	rdate <- rdate[(range[1]<rdate)&(rdate< range[2])]

	par(mar=c(5.1,4.1,4.1,2.1))
	pid <- geo.polarization[,3]
	h <- hist(pid,plot=FALSE)
	cuts <- cut(h$breaks, c(-Inf,mean(pid,na.rm=TRUE),Inf))
	cols <- ifelse(as.numeric(cuts)==1,"red","blue")
	plot(h,col=cols,xlab="Kurtosis of Ideology Scores in PSUs",main="Kurtosis of Political Ideology")
	#legend("topright",col=c("red","blue"),pch=19,c("High Polarization","Low Polarization"),bty="n")
	legend("topleft","B", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)

	par(mar=c(3.1,4.1,1.1,2.1))
	kk <- 1;
	plot(rdate, rmat[,kk],pch=19,ylim=c(0,3.2), xaxt="n", type="n",
		xlab="Survey Dates", ylab="Network Size",main="",col="darkgray")
	axis(1, rdate, format(rdate, "%m/%d"), cex.axis = .8)

	kk <- 3;points(rdate, rmat[,kk],pch=19,col="pink")
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="red",lty=1, lwd=2)


	kk <- 2;
	points(rdate, rmat[,kk],pch=19,col="skyblue")
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="blue",lty=1, lwd=2)

	abline(v=debate,col="black",lwd=2)
	text(x=debate-15,y=0.3,  paste("First Debate =","09/30",sep=""),col="black")
	legend("topleft","E", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)


	temp <- vmat.out(rdata=rdata, var="geo.pol3")
	rmat <- temp$rmat
	rdate <- as.Date(temp$time,origin="1970-01-01")

	debate <- debate2004
	r <- 30 

	range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
	rmat <- rmat[(range[1]<rdate)&(rdate< range[2]),,2]
	rdate <- rdate[(range[1]<rdate)&(rdate< range[2])]

	par(mar=c(5.1,4.1,4.1,2.1))
	pid <- geo.polarization[,4]
	h <- hist(pid,plot=FALSE)
	cuts <- cut(h$breaks, c(-Inf,mean(pid,na.rm=TRUE),Inf))
	cols <- ifelse(as.numeric(cuts)==1,"blue","red")
	plot(h,col=cols,main="Party Sorting",xlab="Correlation between Party ID and Ideology")
	legend("topleft","C", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)

	par(mar=c(3.1,4.1,1.1,2.1))
	kk <- 1;
	plot(rdate, rmat[,kk],pch=19,ylim=c(0,3.2), xaxt="n", type="n",
		xlab="Survey Dates", ylab="Network Size",main="",col="darkgray")
	axis(1, rdate, format(rdate, "%m/%d"), cex.axis = .8)

	kk <- 2;points(rdate, rmat[,kk],pch=19,col="pink")
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="red",lty=1, lwd=2)


	kk <- 3;
	points(rdate, rmat[,kk],pch=19,col="skyblue")
	x <- rmat[,kk]
	t <- as.numeric(rdate)
	y.loess <- loess(y~x,data.frame(x=t,y=x),span=0.4)
	y.predict <- predict(y.loess, data.frame(x=t))
	lines(t,y.predict,col="blue",lty=1, lwd=2)

	abline(v=debate,col="black",lwd=2)
	text(x=debate-15,y=0.3,  paste("First Debate =","09/30",sep=""),col="black")
	legend("topleft","F", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)


	dev.off()

# ===========================================================
# Table B3. Tests on linearity assumptions.
# ===========================================================

	list.r <- c(1:21)
	linear.test <- array(NA, dim=c(length(list.r),7))
	
	rdata <- data[data$year==2004,]
	debate <- debate2004
	
	for (r in list.r){
		range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
		
		rdate <- rdata$svydate2
		x <- rdata[(range[1]<=rdate)&(rdate<=range[2]),]
		t <- as.numeric(rdate[(range[1]<=rdate)&(rdate<=range[2])])
		
		# absolute difference daxte
		abs.debate <- abs(as.numeric(debate)-t)
		x$abs.debate <- abs.debate
		x$t <- t
		
		out5a <- glmer(as.formula(n_size ~abs.debate+(1|t)), data=x,family=poisson(link="log"),control=glmerControl(optimizer="bobyqa"))
		out5b <- glmer(as.formula(n_size ~as.factor(abs.debate)+(1|t)), data=x,family=poisson(link="log"),control=glmerControl(optimizer="bobyqa"))
		a<-anova(out5a,out5b, test="Chisq")

		linear.test[r,1] <- a[,"Df"][1] 
		linear.test[r,2] <- a[,"logLik"][1] 
		linear.test[r,3] <- a[,"Df"][2] 
		linear.test[r,4] <- a[,"logLik"][2] 
		linear.test[r,5] <- a[,"Chisq"][2] 
		linear.test[r,6] <- a[,"Chi Df"][2] 
		linear.test[r,7] <- a[,"Pr(>Chisq)"][2] 	
	}

	linear.test <- linear.test[,c(2,1,4,3,5,6,7)]
	
	colnames(linear.test) <- c("Linear(LL)","Linear(DF)","Dummy(LL)","Dummy(DF)","Chisq","DF","p-value")
	rownames(linear.test) <- list.r 
	
	write.csv(linear.test, file="TableB3.csv")

# ===========================================================
# Figure B1. Permutaion Tests
# ===========================================================

	rdata <- data[data$year==2004,]

	list.r <- c(6:21)
	permute.test <- array(NA, dim=c(2,length(list.r)))
	N.sim <- 1000
	sim.permute.test <- array(NA, dim=c(length(list.r),N.sim))
	
	debate <- debate2004
	
	for (r in list.r){
		range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
		
		rdate <- rdata$svydate2
		x <- rdata[(range[1]<=rdate)&(rdate<=range[2]),]
		t <- as.numeric(rdate[(range[1]<=rdate)&(rdate<=range[2])])
		
		# absolute difference daxte
		abs.debate <- abs(as.numeric(debate)-t)
		x$abs.debate <- abs.debate
		x$t <- t
		
		out1<-glm(as.formula(n_size ~abs.debate), data=x,family="poisson")
	#	cov.m1 <- vcovHC(out1, type="HC0")
	#	std.err <- sqrt(diag(cov.m1))	
		out2<-glm(as.formula(n_size ~abs.debate + r_age+r_female+r_black+r_others+r_educ+r_married+r_nchilds+as.factor(r_wrkstat)),data=x,family="poisson")
		
		b1 <- summary(out1)$coefficients[2,1]
		b2 <- summary(out2)$coefficients[2,1]
		
		permute.test[,which(list.r %in% r)] <- c(b1,b2)
		
	
		# let's do simulation here; using permuation
		for (s in 1:N.sim){
			x <- rdata[(range[1]<=rdate)&(rdate<=range[2]),]
			t <- as.numeric(rdate[(range[1]<=rdate)&(rdate<=range[2])])
			t <- sample(t)
			
			# absolute difference daxte
			abs.debate <- abs(as.numeric(debate)-t)
			x$abs.debate <- abs.debate
			x$t <- t
			
			out1<-glm(as.formula(n_size ~abs.debate), data=x,family="poisson")	
			b1 <- summary(out1)$coefficients[2,1]
			sim.permute.test[which(list.r %in% r),s] <- c(b1)
		} # for s 
	} # r
	
	
	save(permute.test, sim.permute.test, file="per_imp_1027.RData")
	
	load("per_imp_1027.RData")	
	list.r <- c(6:21)
	pdf("FigureB1.pdf",paper="special",width=12,height=10)
	layout(matrix(1:16,4,4,byrow=TRUE))
	for (r in list.r){
		beta.sim <- sim.permute.test[which(list.r%in%r),]
		hist(beta.sim, main=paste("Moving Caliper = ",r,sep=""), xlab="Simulated distribution of debate effects")
		abline(v=permute.test[1,which(list.r%in%r)], col="gray", lwd=2, lty=1)
		abline(v=permute.test[2,which(list.r%in%r)], col="black", lwd=2, lty=2)
		abline(v=sort(beta.sim)[1000*0.95], col="red",lwd=2,lty=1)
	}
	dev.off()


# ===========================================================
# Figure B2. Balancing Tests 
# ===========================================================

# respecification for some variables 
	rdata <- data[data$year==2004,]
	rdata$r_work1 <- ifelse(rdata$r_wrkstat=="Employed/Looking",1,0)
	rdata$r_work2 <- ifelse(rdata$r_wrkstat=="Retired",1,0)
	rdata$r_work3 <- ifelse(rdata$r_wrkstat=="Homemaker",1,0)
	
	list.var.all.res <- c("r_age","r_female","r_white","r_black","r_others","r_educ","r_married",
		"r_nchilds","r_work1","r_work2","r_work3",
		"r_partyid","r_partyid_int",
		"i_saq_skip_gene","i_uncoop","i_poorcomprend","i_length")
	
	list.var.all.int <- c("i_female","i_age","i_white","i_black","i_others","i_tenure")
	
	list.r <- c(6:21)
	permute.test.res <- array(NA, dim=c(length(list.var.all.res),length(list.r)))
	permute.test.int <- array(NA, dim=c(length(list.var.all.int),length(list.r)))
	
	debate <- debate2004 

# set-up the survey design	
	wt <- rdata$wtall
	formwt <- rdata$formwt 
	oversamp <- rdata$oversamp 
	sample <- rdata$sample 
	samplerc <- ifelse(sample %in% 3:4, 3, 
				ifelse(sample %in% 6:7, 6, sample))
	sampcode <- rdata$sampcode 
	rdata$compwt <- wt*formwt*oversamp 
	rdata$samplerc <- samplerc 
	
# run simulations 	
	for (var in list.var.all.res){
		for (r in list.r){	
			range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
			rdate <- rdata$svydate2
			rdata$abs.date <- abs(as.numeric(rdate)-debate)
			rdata$xi <- as.numeric(range[1]<=rdate)&(rdate<= range[2])
			sv.design <- svydesign(id=~sampcode,
							   strata=~samplerc,
							   data=subset(rdata, !is.na(sampcode)),
							   weights=~compwt,
							   nest=TRUE)
		
			a <-svyttest(as.formula(paste(var,"~xi",sep="")), design=sv.design)
		
			permute.test.res[which(list.var.all.res %in% var),which(list.r %in% r)] <- a$statistic
		
		}
	}
	
	
	for (var in list.var.all.int){
		for (r in list.r){	
		range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
		rdate <- rdata$svydate2
		rdata$abs.date <- abs(as.numeric(rdate)-debate)
	
	
		rdata$xi <- as.numeric(range[1]<=rdate)&(rdate<= range[2])
		sv.design <- svydesign(id=~sampcode,
						   strata=~samplerc,
						   data=subset(rdata, !is.na(sampcode)),
						   weights=~compwt,
						   nest=TRUE)
	
		a <-svyttest(as.formula(paste(var,"~xi",sep="")), design=sv.design)
	
	
		permute.test.int[which(list.var.all.int %in% var),which(list.r %in% r)] <- a$statistic
		}
	}
	
	
	bal.out1 <- permute.test.res
	bal.out2 <- permute.test.int
	
# -------- plot 	
	list.varname.res <- c("Age","Female","White","Black","Other race","Years of education","Married",
		"Number of childs","Employed/Looking","Retired","Homemaker",
		"Party Identification","Partysan Strength",
		"SAQ Gene Skip","Uncooperativeness","Poor Comprehension","Interview Length")
	
	list.varname.int <- c("Female","Age","White","Black","Other race","Tenure")
	
	pdf("FigureB2.pdf",paper="special",width=8,height=8)
	layout(matrix(1:4,2,2,byrow=TRUE),
		widths=c(2,1))
	
	par(mar=c(5.1, 4.1, 4.1, 2.1))
	
	plot(list.r,bal.out1[1,],pch=19,ylim=c(-5,5),type="n",xlim=c(5,21),
		xlab="Moving Window",
		ylab=expression(paste("T-statistics for ",mu[window]-mu[rest],sep="")),
		main="Respondent Characteristics")
	for (i in 1:length(list.varname.res)) {
		col.bal <- ifelse(abs(bal.out1[i,])>2,"red","gray")
		points(list.r,bal.out1[i,],type="p",pch=i,col=col.bal)
	}
	
	abline(h=c(-2,2),lty=2)
	legend("topleft","A", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)
	
	par(mar=c(1, 1, 1, 1))
	plot(list.r,list.r,type="n",
		xlab="",ylab="",xaxt="n",yaxt="n",bty="n")
	legend("center",inset=c(-0.3,0),list.varname.res,pch=c(1:length(list.var.all.res)),bty="n",cex=1)
	
	par(mar=c(5.1, 4.1, 4.1, 2.1))
	plot(list.r,bal.out2[1,],pch=19,ylim=c(-5,5),type="n",xlim=c(5,21),
		xlab="Moving Window",
		ylab=expression(paste("T-statistics for ",mu[window]-mu[rest],sep="")),
		main="Interviewer Characteristics")
	for (i in 1:length(list.varname.int)) {
		col.bal <- ifelse(abs(bal.out2[i,])>2,"red","gray")
		points(list.r,bal.out2[i,],type="p",pch=i,col=col.bal)
	}
	
	abline(h=c(-2,2),lty=2)
	legend("topleft","B", bty="n",cex=2, x.intersp=-0.5,y.intersp=0.2,pch=NA)
	
	par(mar=c(1, 1, 1, 1))
	plot(list.r,list.r,type="n",
		xlab="",ylab="",xaxt="n",yaxt="n",bty="n")
	legend("center",inset=c(-0.3,0),list.varname.int,pch=c(1:length(list.var.all.int)),bty="n",cex=1)
	
	dev.off()

# ===========================================================
# Figure B3. Event-moving Tests 
# ===========================================================

	rdata <- data[data$year==2004,]

	list.r <- c(6:21)
	event.test <- array(NA, dim=c(2,length(list.r)))
	list.date <- sort(unique(rdata$svydate2))
	list.sim.event.test <- list()
	debate <- debate2004
	
	for (r in list.r){
		range <- as.Date(c(debate-r,debate+r), origin="1970-01-01")
		
		rdate <- rdata$svydate2
		x <- rdata[(range[1]<=rdate)&(rdate<=range[2]),]
		t <- as.numeric(rdate[(range[1]<=rdate)&(rdate<=range[2])])
		
		# absolute difference daxte
		abs.debate <- abs(as.numeric(debate)-t)
		x$abs.debate <- abs.debate
		x$t <- t
		
		out1<-glm(as.formula(n_size ~abs.debate), data=x,family="poisson")
		out2<-glm(as.formula(n_size ~abs.debate + r_age+r_female+r_black+r_others+r_educ+r_married+r_nchilds+as.factor(r_wrkstat)),data=x,family="poisson")
		
		b1 <- summary(out1)$coefficients[2,3]
		b2 <- summary(out2)$coefficients[2,3]
		
		event.test[,which(list.r %in% r)] <- c(b1,b2)
	
		# let's check all dates per each caliper
		N.sim <- length(list.date)-2*r
		sim.event.test <- array(NA, dim=c(2,N.sim))

		for (s in 1:N.sim){
			s.debate <- list.date[s+r]
			range <- as.Date(c(s.debate-r,s.debate+r), origin="1970-01-01")
			rdate <- rdata$svydate2

			x <- rdata[(range[1]<=rdate)&(rdate<=range[2]),]
			t <- as.numeric(rdate[(range[1]<=rdate)&(rdate<=range[2])])
			
			# absolute difference daxte
			abs.debate <- abs(as.numeric(s.debate)-t)
			x$abs.debate <- abs.debate
			x$t <- t

			out1<-glm(as.formula(n_size ~abs.debate), data=x,family="poisson")	
			out2<-glm(as.formula(n_size ~abs.debate + r_age+r_female+r_black+r_others+r_educ+r_married+r_nchilds+as.factor(r_wrkstat)),data=x,family="poisson")
			b1 <- summary(out1)$coefficients[2,3]
			b2 <- summary(out2)$coefficients[2,3]

			sim.event.test[1,s] <- b1
			sim.event.test[2,s] <- b2

		} # for s 
		list.sim.event.test[[which(list.r %in% r)]] <- sim.event.test
	
	} # r


# -------- plot 
pdf("FigureB3.pdf",paper="special",width=12,height=10)
layout(matrix(1:16,4,4,byrow=TRUE))
for (r in list.r){
	beta <- event.test[2,which(list.r%in%r)]
	sim.event.test <- list.sim.event.test[[which(list.r%in%r)]]
	beta.sim <- sim.event.test[2,]
	pch.sim <- ifelse(beta.sim>beta,19,1)
	
	xid <- list.date[c(r+1):c(length(list.date)-r)]
	if ((r-6) %% 4 == 0) {
		par(mar=c(5.1,4.1,2.1,2.1))
		plot(xid,beta.sim,xaxt="n",xlab="Survey Date",ylab="Z-score for each date's effect",main=paste("Moving Window = ",r,sep=""),pch=pch.sim,ylim=c(-4,4))	
	} else {
		par(mar=c(5.1,4.1,2.1,2.1))
		plot(xid,beta.sim,xaxt="n",xlab="Survey Date",ylab="Z-score",main=paste("Moving Window = ",r,sep=""),pch=pch.sim,ylim=c(-4,4))	
	}
	
	points(debate,beta,col="red",pch=19)
	abline(h=beta,col="red")
	abline(h=1.96,col="black",lty=2)
	axis(1, xid, format(xid, "%m/%d"), cex.axis = .8)
}

dev.off()



